import pandas as pd
Data = pd.read_csv(r'C:\Users\Dell\Documents\ipec\Datas\heart.csv')
Data.head()
C:\Users\Dell\anaconda3\Lib\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2.3 | 0 | 0 | 1 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3.5 | 0 | 0 | 2 | 1 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1.4 | 2 | 0 | 2 | 1 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0.8 | 2 | 0 | 2 | 1 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0.6 | 2 | 0 | 2 | 1 |
Data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 303 non-null int64 1 sex 303 non-null int64 2 cp 303 non-null int64 3 trestbps 303 non-null int64 4 chol 303 non-null int64 5 fbs 303 non-null int64 6 restecg 303 non-null int64 7 thalach 303 non-null int64 8 exang 303 non-null int64 9 oldpeak 303 non-null float64 10 slope 303 non-null int64 11 ca 303 non-null int64 12 thal 303 non-null int64 13 target 303 non-null int64 dtypes: float64(1), int64(13) memory usage: 33.3 KB
Data.oldpeak = Data.oldpeak.astype(int)
Data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 303 non-null int64 1 sex 303 non-null int64 2 cp 303 non-null int64 3 trestbps 303 non-null int64 4 chol 303 non-null int64 5 fbs 303 non-null int64 6 restecg 303 non-null int64 7 thalach 303 non-null int64 8 exang 303 non-null int64 9 oldpeak 303 non-null int32 10 slope 303 non-null int64 11 ca 303 non-null int64 12 thal 303 non-null int64 13 target 303 non-null int64 dtypes: int32(1), int64(13) memory usage: 32.1 KB
print(f" Number of Rows:{Data.shape[0]}\n Number of Columns:{Data.shape[1]}")
Number of Rows:303 Number of Columns:14
Data.columns
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
dtype='object')
Data.describe()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 54.366337 | 0.683168 | 0.966997 | 131.623762 | 246.264026 | 0.148515 | 0.528053 | 149.646865 | 0.326733 | 0.765677 | 1.399340 | 0.729373 | 2.313531 | 0.544554 |
| std | 9.082101 | 0.466011 | 1.032052 | 17.538143 | 51.830751 | 0.356198 | 0.525860 | 22.905161 | 0.469794 | 1.070933 | 0.616226 | 1.022606 | 0.612277 | 0.498835 |
| min | 29.000000 | 0.000000 | 0.000000 | 94.000000 | 126.000000 | 0.000000 | 0.000000 | 71.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 47.500000 | 0.000000 | 0.000000 | 120.000000 | 211.000000 | 0.000000 | 0.000000 | 133.500000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 2.000000 | 0.000000 |
| 50% | 55.000000 | 1.000000 | 1.000000 | 130.000000 | 240.000000 | 0.000000 | 1.000000 | 153.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 2.000000 | 1.000000 |
| 75% | 61.000000 | 1.000000 | 2.000000 | 140.000000 | 274.500000 | 0.000000 | 1.000000 | 166.000000 | 1.000000 | 1.000000 | 2.000000 | 1.000000 | 3.000000 | 1.000000 |
| max | 77.000000 | 1.000000 | 3.000000 | 200.000000 | 564.000000 | 1.000000 | 2.000000 | 202.000000 | 1.000000 | 6.000000 | 2.000000 | 4.000000 | 3.000000 | 1.000000 |
Data.value_counts()
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target
38 1 2 138 175 0 1 173 0 0 2 4 2 1 2
59 1 0 110 239 0 0 142 1 1 1 1 3 0 1
2 126 218 1 1 134 0 2 1 1 1 0 1
1 140 221 0 1 164 1 0 2 0 2 1 1
0 170 326 0 0 140 1 3 0 0 3 0 1
..
51 1 2 94 227 0 1 154 1 0 2 1 3 1 1
0 140 299 0 1 173 1 1 2 0 3 0 1
298 0 1 122 1 4 1 3 3 0 1
261 0 0 186 1 0 2 0 2 1 1
77 1 0 125 304 0 0 162 1 0 2 3 2 0 1
Name: count, Length: 302, dtype: int64
Data['oldpeak'] = round(Data['oldpeak'])
Data.head()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2 | 0 | 0 | 1 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3 | 0 | 0 | 2 | 1 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1 | 2 | 0 | 2 | 1 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0 | 2 | 0 | 2 | 1 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0 | 2 | 0 | 2 | 1 |
Data['age'].value_counts
<bound method IndexOpsMixin.value_counts of 0 63
1 37
2 41
3 56
4 57
..
298 57
299 45
300 68
301 57
302 57
Name: age, Length: 303, dtype: int64>
Data['sex'].value_counts
<bound method IndexOpsMixin.value_counts of 0 1
1 1
2 0
3 1
4 0
..
298 0
299 1
300 1
301 1
302 0
Name: sex, Length: 303, dtype: int64>
Data['cp'].value_counts
<bound method IndexOpsMixin.value_counts of 0 3
1 2
2 1
3 1
4 0
..
298 0
299 3
300 0
301 0
302 1
Name: cp, Length: 303, dtype: int64>
Data['trestbps'].value_counts
<bound method IndexOpsMixin.value_counts of 0 145
1 130
2 130
3 120
4 120
...
298 140
299 110
300 144
301 130
302 130
Name: trestbps, Length: 303, dtype: int64>
Data['chol'].value_counts
<bound method IndexOpsMixin.value_counts of 0 233
1 250
2 204
3 236
4 354
...
298 241
299 264
300 193
301 131
302 236
Name: chol, Length: 303, dtype: int64>
Data['fbs'].value_counts
<bound method IndexOpsMixin.value_counts of 0 1
1 0
2 0
3 0
4 0
..
298 0
299 0
300 1
301 0
302 0
Name: fbs, Length: 303, dtype: int64>
Data['restecg'].value_counts
<bound method IndexOpsMixin.value_counts of 0 0
1 1
2 0
3 1
4 1
..
298 1
299 1
300 1
301 1
302 0
Name: restecg, Length: 303, dtype: int64>
Data['thalach'].value_counts
<bound method IndexOpsMixin.value_counts of 0 150
1 187
2 172
3 178
4 163
...
298 123
299 132
300 141
301 115
302 174
Name: thalach, Length: 303, dtype: int64>
Data['exang'].value_counts
<bound method IndexOpsMixin.value_counts of 0 0
1 0
2 0
3 0
4 1
..
298 1
299 0
300 0
301 1
302 0
Name: exang, Length: 303, dtype: int64>
Data['oldpeak'].value_counts
<bound method IndexOpsMixin.value_counts of 0 2
1 3
2 1
3 0
4 0
..
298 0
299 1
300 3
301 1
302 0
Name: oldpeak, Length: 303, dtype: int32>
Data['slope'].value_counts
<bound method IndexOpsMixin.value_counts of 0 0
1 0
2 2
3 2
4 2
..
298 1
299 1
300 1
301 1
302 1
Name: slope, Length: 303, dtype: int64>
Data['ca'].value_counts
<bound method IndexOpsMixin.value_counts of 0 0
1 0
2 0
3 0
4 0
..
298 0
299 0
300 2
301 1
302 1
Name: ca, Length: 303, dtype: int64>
Data['thal'].value_counts
<bound method IndexOpsMixin.value_counts of 0 1
1 2
2 2
3 2
4 2
..
298 3
299 3
300 3
301 3
302 2
Name: thal, Length: 303, dtype: int64>
Data['target'].value_counts
<bound method IndexOpsMixin.value_counts of 0 1
1 1
2 1
3 1
4 1
..
298 0
299 0
300 0
301 0
302 0
Name: target, Length: 303, dtype: int64>
Data.isnull().sum()
age 0 sex 0 cp 0 trestbps 0 chol 0 fbs 0 restecg 0 thalach 0 exang 0 oldpeak 0 slope 0 ca 0 thal 0 target 0 dtype: int64
Data[Data.duplicated()]
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 164 | 38 | 1 | 2 | 138 | 175 | 0 | 1 | 173 | 0 | 0 | 2 | 4 | 2 | 1 |
Data.drop_duplicates(inplace=True)
Data.columns
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
dtype='object')
#! pip install missingno
import missingno as msno
msno.bar(Data)
<Axes: >
print(Data.dtypes)
age int64 sex int64 cp int64 trestbps int64 chol int64 fbs int64 restecg int64 thalach int64 exang int64 oldpeak int32 slope int64 ca int64 thal int64 target int64 dtype: object
import matplotlib.pyplot as plt
des=Data.describe()
ax = des.plot(kind='barh')
plt.savefig('des.png',bbox_inches='tight')
transposed_Data=Data.head(10)
transposed_Data
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2 | 0 | 0 | 1 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3 | 0 | 0 | 2 | 1 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1 | 2 | 0 | 2 | 1 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0 | 2 | 0 | 2 | 1 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0 | 2 | 0 | 2 | 1 |
| 5 | 57 | 1 | 0 | 140 | 192 | 0 | 1 | 148 | 0 | 0 | 1 | 0 | 1 | 1 |
| 6 | 56 | 0 | 1 | 140 | 294 | 0 | 0 | 153 | 0 | 1 | 1 | 0 | 2 | 1 |
| 7 | 44 | 1 | 1 | 120 | 263 | 0 | 1 | 173 | 0 | 0 | 2 | 0 | 3 | 1 |
| 8 | 52 | 1 | 2 | 172 | 199 | 1 | 1 | 162 | 0 | 0 | 2 | 0 | 3 | 1 |
| 9 | 57 | 1 | 2 | 150 | 168 | 0 | 1 | 174 | 0 | 1 | 2 | 0 | 2 | 1 |
Data.head(10).T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 63 | 37 | 41 | 56 | 57 | 57 | 56 | 44 | 52 | 57 |
| sex | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 1 | 1 |
| cp | 3 | 2 | 1 | 1 | 0 | 0 | 1 | 1 | 2 | 2 |
| trestbps | 145 | 130 | 130 | 120 | 120 | 140 | 140 | 120 | 172 | 150 |
| chol | 233 | 250 | 204 | 236 | 354 | 192 | 294 | 263 | 199 | 168 |
| fbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| restecg | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| thalach | 150 | 187 | 172 | 178 | 163 | 148 | 153 | 173 | 162 | 174 |
| exang | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| oldpeak | 2 | 3 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| slope | 0 | 0 | 2 | 2 | 2 | 1 | 1 | 2 | 2 | 2 |
| ca | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| thal | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 3 | 3 | 2 |
| target | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
desc_Data=Data.describe().T
desc_Data
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| age | 302.0 | 54.420530 | 9.047970 | 29.0 | 48.00 | 55.5 | 61.00 | 77.0 |
| sex | 302.0 | 0.682119 | 0.466426 | 0.0 | 0.00 | 1.0 | 1.00 | 1.0 |
| cp | 302.0 | 0.963576 | 1.032044 | 0.0 | 0.00 | 1.0 | 2.00 | 3.0 |
| trestbps | 302.0 | 131.602649 | 17.563394 | 94.0 | 120.00 | 130.0 | 140.00 | 200.0 |
| chol | 302.0 | 246.500000 | 51.753489 | 126.0 | 211.00 | 240.5 | 274.75 | 564.0 |
| fbs | 302.0 | 0.149007 | 0.356686 | 0.0 | 0.00 | 0.0 | 0.00 | 1.0 |
| restecg | 302.0 | 0.526490 | 0.526027 | 0.0 | 0.00 | 1.0 | 1.00 | 2.0 |
| thalach | 302.0 | 149.569536 | 22.903527 | 71.0 | 133.25 | 152.5 | 166.00 | 202.0 |
| exang | 302.0 | 0.327815 | 0.470196 | 0.0 | 0.00 | 0.0 | 1.00 | 1.0 |
| oldpeak | 302.0 | 0.768212 | 1.071799 | 0.0 | 0.00 | 0.0 | 1.00 | 6.0 |
| slope | 302.0 | 1.397351 | 0.616274 | 0.0 | 1.00 | 1.0 | 2.00 | 2.0 |
| ca | 302.0 | 0.718543 | 1.006748 | 0.0 | 0.00 | 0.0 | 1.00 | 4.0 |
| thal | 302.0 | 2.314570 | 0.613026 | 0.0 | 2.00 | 2.0 | 3.00 | 3.0 |
| target | 302.0 | 0.543046 | 0.498970 | 0.0 | 0.00 | 1.0 | 1.00 | 1.0 |
Data.isnull().sum()
age 0 sex 0 cp 0 trestbps 0 chol 0 fbs 0 restecg 0 thalach 0 exang 0 oldpeak 0 slope 0 ca 0 thal 0 target 0 dtype: int64
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
plot_city=Data['age'].value_counts()[0:50].reset_index()
plot_city.columns=['age','Count']
px.bar(plot_city,x='age',y='Count',template='gridon',
title='age',color='Count')
plot_city=Data['sex'].value_counts()[0:50].reset_index()
plot_city.columns=['sex','Count']
px.bar(plot_city,x='sex',y='Count',template='gridon',
title='sex',color='Count')
plot_city=Data['cp'].value_counts()[0:50].reset_index()
plot_city.columns=['cp','Count']
px.bar(plot_city,x='cp',y='Count',template='gridon',
title='cp',color='Count')
plot_city=Data['trestbps'].value_counts()[0:50].reset_index()
plot_city.columns=['trestbps','Count']
px.bar(plot_city,x='trestbps',y='Count',template='gridon',
title='trestbps',color='Count')
plot_city=Data['chol'].value_counts()[0:50].reset_index()
plot_city.columns=['chol','Count']
px.bar(plot_city,x='chol',y='Count',template='gridon',
title='chol',color='Count')
plot_city=Data['fbs'].value_counts()[0:50].reset_index()
plot_city.columns=['fbs','Count']
px.bar(plot_city,x='fbs',y='Count',template='gridon',
title='fbs',color='Count')
plot_city=Data['restecg'].value_counts()[0:50].reset_index()
plot_city.columns=['restecg','Count']
px.bar(plot_city,x='restecg',y='Count',template='gridon',
title='restecg',color='Count')
plot_city=Data['thalach'].value_counts()[0:50].reset_index()
plot_city.columns=['thalach','Count']
px.bar(plot_city,x='thalach',y='Count',template='gridon',
title='thalach',color='Count')
plot_city=Data['exang'].value_counts()[0:50].reset_index()
plot_city.columns=['exang','Count']
px.bar(plot_city,x='exang',y='Count',template='gridon',
title='exang',color='Count')
import plotly.express as px
plot_city=Data['oldpeak'].value_counts()[0:50].reset_index()
plot_city.columns=['oldpeak','Count']
px.bar(plot_city,x='oldpeak',y='Count',template='gridon',
title='oldpeak',color='Count')
plot_city=Data['slope'].value_counts()[0:50].reset_index()
plot_city.columns=['slope','Count']
px.bar(plot_city,x='slope',y='Count',template='gridon',
title='slope',color='Count')
plot_city=Data['ca'].value_counts()[0:50].reset_index()
plot_city.columns=['ca','Count']
px.bar(plot_city,x='ca',y='Count',template='gridon',
title='ca',color='Count')
plot_city=Data['thal'].value_counts()[0:50].reset_index()
plot_city.columns=['thal','Count']
px.bar(plot_city,x='thal',y='Count',template='gridon',
title='thal',color='Count')
plot_city=Data['target'].value_counts()[0:50].reset_index()
plot_city.columns=['target','Count']
px.bar(plot_city,x='target',y='Count',template='gridon',
title='target',color='Count')
Data.hist(figsize = (12, 12))
plt.show()
sns.barplot(x='age',y='target',data=Data)
plt.xticks(rotation=90)
plt.show()
sns.barplot(x='age',y='trestbps',data=Data)
plt.xticks(rotation=90)
plt.show()
sns.barplot(x='thal',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='age',y='target',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='age',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='sex',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='cp',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='trestbps',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='chol',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='fbs',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='restecg',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='thalach',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='exang',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='oldpeak',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='slope',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='ca',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='thal',data=Data)
plt.xticks(rotation=90)
plt.show()
fig=plt.gcf()
fig.set_size_inches(10, 6)
sns.boxplot(x='target',data=Data)
plt.xticks(rotation=90)
plt.show()
sns.kdeplot(data=Data, x='sex', bw_method=.15)
<Axes: xlabel='sex', ylabel='Density'>
sns.kdeplot(data=Data, x='age', bw_method=.15)
<Axes: xlabel='age', ylabel='Density'>
sns.kdeplot(data=Data, x='cp', bw_method=.15)
<Axes: xlabel='cp', ylabel='Density'>
sns.kdeplot(data=Data, x='trestbps', bw_method=.15)
<Axes: xlabel='trestbps', ylabel='Density'>
sns.kdeplot(data=Data, x='chol', bw_method=.15)
<Axes: xlabel='chol', ylabel='Density'>
sns.kdeplot(data=Data, x='fbs', bw_method=.15)
<Axes: xlabel='fbs', ylabel='Density'>
sns.kdeplot(data=Data, x='restecg', bw_method=.15)
<Axes: xlabel='restecg', ylabel='Density'>
sns.kdeplot(data=Data, x='thalach', bw_method=.15)
<Axes: xlabel='thalach', ylabel='Density'>
sns.kdeplot(data=Data, x='exang', bw_method=.15)
<Axes: xlabel='exang', ylabel='Density'>
sns.kdeplot(data=Data, x='oldpeak', bw_method=.15)
<Axes: xlabel='oldpeak', ylabel='Density'>
sns.kdeplot(data=Data, x='slope', bw_method=.15)
<Axes: xlabel='slope', ylabel='Density'>
sns.kdeplot(data=Data, x='ca', bw_method=.15)
<Axes: xlabel='ca', ylabel='Density'>
sns.kdeplot(data=Data, x='thal', bw_method=.15)
<Axes: xlabel='thal', ylabel='Density'>
sns.kdeplot(data=Data, x='target', bw_method=.15)
<Axes: xlabel='target', ylabel='Density'>
sns.kdeplot(data=Data, x='age', bw_method=.15, hue='sex')
<Axes: xlabel='age', ylabel='Density'>
sns.kdeplot(data=Data, x='thal', bw_method=.15, hue='target')
<Axes: xlabel='thal', ylabel='Density'>
sns.distplot(Data.age)
<Axes: xlabel='age', ylabel='Density'>
sns.distplot(Data.sex)
<Axes: xlabel='sex', ylabel='Density'>
age 0 gendar 0 chest_pain 0 trestbps 0 chol 0 fbs 0 restecg 0 thalach 0 exang 0 oldpeak 0 slope 0 ca 0 thal 0 target
sns.distplot(Data.cp)
<Axes: xlabel='cp', ylabel='Density'>
sns.distplot(Data.trestbps)
<Axes: xlabel='trestbps', ylabel='Density'>
sns.distplot(Data.chol)
<Axes: xlabel='chol', ylabel='Density'>
sns.distplot(Data.fbs)
<Axes: xlabel='fbs', ylabel='Density'>
sns.distplot(Data.restecg)
<Axes: xlabel='restecg', ylabel='Density'>
sns.distplot(Data.thalach)
<Axes: xlabel='thalach', ylabel='Density'>
sns.distplot(Data.exang)
<Axes: xlabel='exang', ylabel='Density'>
sns.distplot(Data.oldpeak)
<Axes: xlabel='oldpeak', ylabel='Density'>
sns.distplot(Data.slope)
<Axes: xlabel='slope', ylabel='Density'>
sns.distplot(Data.ca)
<Axes: xlabel='ca', ylabel='Density'>
sns.distplot(Data.thal)
<Axes: xlabel='thal', ylabel='Density'>
sns.distplot(Data.target)
<Axes: xlabel='target', ylabel='Density'>
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='age', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='sex', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='cp', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='trestbps', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='chol', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='fbs', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='restecg', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='thalach', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='exang', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='oldpeak', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='slope', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='ca', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
sns.set_theme()
sns.set_palette(sns.color_palette('rocket', 3))
sns.displot(Data, x='thal', hue='target', bins=30, linewidth=0, height=5, kde=True, aspect=1.6)
plt.show()
from sklearn.preprocessing import LabelEncoder
for i in col:
Data[i] = le.fit_transform(Data[i].astype(str))
Data.head()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2 | 0 | 0 | 1 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3 | 0 | 0 | 2 | 1 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1 | 2 | 0 | 2 | 1 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0 | 2 | 0 | 2 | 1 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0 | 2 | 0 | 2 | 1 |
x= Data.iloc[:,0:-1]
y= Data.iloc[:,-1]
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=1)
print("Training samples:", Xtrain.shape)
print("Testing samples:", Xtest.shape)
Training samples: (241, 13) Testing samples: (61, 13)
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf =clf.fit(Xtrain, ytrain)
y_pred = clf.predict(Xtest)
from sklearn import metrics
print("Accuracy:{0}%".format(metrics.accuracy_score(ytest,y_pred)*100))
Accuracy:77.04918032786885%
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np
cm = confusion_matrix(ytest, y_pred)
print(cm)
sns.heatmap(cm/np.sum(cm), annot=True,fmt='.2%',cmap='Blues')
[[21 8] [ 6 26]]
<Axes: >
from sklearn.metrics import classification_report
print(classification_report(ytest, y_pred))
precision recall f1-score support
0 0.78 0.72 0.75 29
1 0.76 0.81 0.79 32
accuracy 0.77 61
macro avg 0.77 0.77 0.77 61
weighted avg 0.77 0.77 0.77 61
from sklearn.tree import DecisionTreeRegressor
DtReg = DecisionTreeRegressor()
DtReg.fit(Xtrain, ytrain)
row = [2, 4, 6, 7, 10, 30, 12, 32, 23, 15, 21, 20, 31]
DtReg_yhat = DtReg.predict([row])
print(DtReg_yhat[0])
0.0